Code
# analysis
library("DHARMa")
library("car")
library("lme4")
library("glmmTMB")
# plotting
library("tidyverse")
library("ggplot2")
library("patchwork")
library("RColorBrewer")
theme_set(theme_bw())Analyses of herbivory and chemistry data for the article, “Growth-defense trade-offs facilitate species coexistence: empirical evidence from recently diverged tropical plants.”
Loading packages and setting defaults:
# analysis
library("DHARMa")
library("car")
library("lme4")
library("glmmTMB")
# plotting
library("tidyverse")
library("ggplot2")
library("patchwork")
library("RColorBrewer")
theme_set(theme_bw())We downloaded Panama Canal Authority rainfall data for Gamboa and summed rainfall for the dry season (December-April) for both 2019 and 2022.
# getting rainfall data for Gamboa
rain <- read.csv("/Users/Julia/Downloads/ACP_rainfall_D-J/Gamboa_ra.csv", header = T)
rain$date <- as.Date(rain$date, "%d/%m/%Y")
# 2019
sum_dry_ra_19 <- rain %>%
filter(date >= "2018-12-01" & date <= "2019-04-30", chk_note != "missing") %>%
summarize(sum_ra = sum(ra)) # 4
# looking at missing data - missing data for 9 days in December from 5/12 to 13/12
missing_19 <- rain %>%
filter(date >= "2018-12-01" & date <= "2019-04-30", chk_note == "missing")
unique(missing_19$date)
# 2022
sum_dry_ra_22 <- rain %>%
filter(date >= "2021-12-01" & date <= "2022-04-30") %>%
summarize(sum_ra = sum(ra)) # 137
# looking at missing data - missing data for 9 days in December from 5/12 to 13/12 (same dates and times)
missing_22 <- rain %>%
filter(date >= "2018-12-01" & date <= "2019-04-30", chk_note == "missing")
unique(missing_22$date)Here we build a climate PCA figure and distribution maps for C. allenii and C. villosissimus, building from code and data originally assembled by Dena Grossenbacher and Oscar Vargas for: Vargas, Oscar M., et al. “Patterns of speciation are similar across mountainous and lowland regions for a Neotropical plant radiation (Costaceae: Costus).” Evolution 74.12 (2020): 2644-2661.
# code silenced because of long run time
# all mapping code modified from:
library(dismo) #raster manipulation
library(ecospat) # climate pca and niche equivalency
library(factoextra) #biplot
library(distances) # euclidean distances
library(ContourFunctions) #make title with multiple colors
library(weights) # weighted chi squared
library(ade4) # PCA
#import climate raster stack (created using getclimateraster.R; stacked in this order: bio1,bio4,bio12,bio15)
envs = stack("data/envs.tif")
###########################################
# Projecting occurrence data
###########################################
#import occurrence data
occurrence_data <- read.csv("data/cleaned.occurrences.csv")
# Project to Coordinate system
occurrence_data.spdf <- SpatialPointsDataFrame(data.frame(occurrence_data$decimalLongitude,
occurrence_data$decimalLatitude),
data=occurrence_data,
proj4string=CRS("+proj=aea +lat_1=-5 +lat_2=-42 +lat_0=-32 +lon_0=-60 +x_0=0 +y_0=0 +ellps=aust_SA +units=m +no_defs"))
# Create new data frame with your new coordinates
new_coordinates <- data.frame(species = occurrence_data[,c("acceptedScientificName")], #May need to change column number
coordinates(occurrence_data.spdf))
# Make a SpatialPointDataFrame for all occurrences
all.spdf <- SpatialPointsDataFrame(data.frame(new_coordinates$occurrence_data.decimalLongitude,
new_coordinates$occurrence_data.decimalLatitude),
data=new_coordinates,
proj4string=CRS("+proj=aea +lat_1=-5 +lat_2=-42 +lat_0=-32 +lon_0=-60 +x_0=0 +y_0=0 +ellps=aust_SA +units=m +no_defs"))
# plot to make sure everything is working
#plot(envs[[1]])
#points(all.spdf)
###########################################
# Extract climate data for background occurrences
###########################################
# Create a RasterLayer with the extent of occurrences
r <- raster(envs)
# Set the resolution of the cells to 0.01 latitudinal degree (ca. 1x1km)
res(r) <- 0.01
# Expand the extent of the RasterLayer a little
r <- extend(r, extent(r)+1)
# Sample all species occurrences to get background grid cells
# Sample each grid cell only once
background.occ <-gridSample(all.spdf, r, n=1)
df.background.occ <- data.frame(background.occ)
# Extract bioclim variables for background grid cells
# Note that this extraction yields ~2% NAs that vary slightly each time this is run
my.background.values <- data.frame(extract(envs,background.occ))
my.background.values$acceptedScientificName <- "background"
nrow(my.background.values)
my.background.values.noNA <- na.omit(my.background.values) #drop points with NA
nrow(my.background.values.noNA)
df.downsampled <- my.background.values.noNA
###########################################
# Extract climate data for each species' occurrences
###########################################
# for each species, sample each occupied grid cell only once
sp <- sort(unique(as.character(occurrence_data$acceptedScientificName)))
for (i in 1:length(sp)) {
mysp = sp[i]
my.sp.occ <- gridSample(subset(all.spdf, species==mysp), r, n=1)
my.sp.occ <- data.frame(my.sp.occ)
# Extract bioclim variables for species grid cells
# Note that this extraction yields ~2% NAs that vary slightly each time this is run
my.values <- extract(envs,my.sp.occ)
df.my.values = data.frame(my.values)
df.my.values$acceptedScientificName <- mysp
head(df.my.values)
df.my.values <- na.omit(df.my.values) #drop points with NA
df.downsampled <- rbind(df.my.values,df.downsampled)
}
colnames(df.downsampled) <- c("bio1","bio4","bio12","bio15", "acceptedScientificName")
#######################################################
###### Principal components analysis
#######################################################
# note that dudi.pca produces slightly different absolute scores each time it is executed
df.background = subset(df.downsampled, acceptedScientificName=="background")
nrow(df.background) # number of background points used to construct the PCA
genus.df = subset(df.downsampled, !acceptedScientificName=="background")
#use background points to make PCA environment
w<-c(rep(0,nrow(genus.df)),rep(1,nrow(df.background))) #vector of weight, 0 for the occurences, 1 for the background points
pca.cal <- dudi.pca(df.downsampled[,c("bio1","bio12","bio4","bio15")], row.w = w, center = TRUE, scale = TRUE, scannf = FALSE, nf = 2)
#save(output/pca.cal, file="pca.cal")
#bind PC scores to df.downsampled
df.downsampled=cbind(df.downsampled,pca.cal$li)
#background points
df.background = subset(df.downsampled, acceptedScientificName=="background")
##########################################
# Get species mean climate values and number of grid cells occupied
##########################################
#get species mean values and save
spsum <- aggregate(list(df.downsampled[,c("bio1","bio4","bio12","bio15","Axis1","Axis2")]), by = list(df.downsampled$acceptedScientificName), mean)
names(spsum)[names(spsum) == 'Group.1'] <- 'acceptedScientificName'
#get number of grid cells occupied for each species
df.ngrids <- as.data.frame(table(df.downsampled$acceptedScientificName))
colnames(df.ngrids) <- c("acceptedScientificName", "n")
spsum <- merge(spsum,df.ngrids, by="acceptedScientificName")
#write.csv(spsum, "output/species.clim.means.csv",row.names=FALSE)
#then modified by hand to add species names as they appear in the phylogeny and region (mountain influenced vs amazonian)
##########################################
# Make climate PC1xPC2 graphs for all species pairs
##########################################
#import basic info
sis.sum <- read.csv("data/julia_pairs.csv")
#library(colorblindcheck)
#palette_check(c("#cc79a7", "#56B4E9","#FFD700", "#50be73"), plot = TRUE)
#plot
#pdf("figures/Figure_VillClade.pdf", width=6,height=6)
for (i in 1:nrow(sis.sum)) {
plot(df.downsampled$Axis1,df.downsampled$Axis2, xlab="climate axis 1", ylab="climate axis 2", pch=20, col="lightgray")
abline(h=0, col="darkgray"); abline(v=0, col="darkgray")
sis.b = subset(df.downsampled, acceptedScientificName %in% sis.sum$b.geo[i])
sis.a = subset(df.downsampled, acceptedScientificName %in% sis.sum$a.geo[i])
sis.c = subset(df.downsampled, acceptedScientificName %in% sis.sum$c.geo[i])
sis.d = subset(df.downsampled, acceptedScientificName %in% sis.sum$d.geo[i])
points(sis.d$Axis1,sis.d$Axis2, col="#50be73", pch=1, lwd=2) #lasius green
points(sis.b$Axis1,sis.b$Axis2, col="#56B4E9", pch=1, lwd=2) #bracteatus blue
points(sis.a$Axis1,sis.a$Axis2, col="#cc79a7", pch=1, lwd=2) #allenii pink
points(sis.c$Axis1,sis.c$Axis2, col="#FFD700", pch=1, lwd=2) #vill yellow
#draw connector line for euclidean distance
#segments(mean(sis.a$Axis1),mean(sis.a$Axis2), mean(sis.b$Axis1), mean(sis.b$Axis2), lwd=2, col="green")
#multicolor.title(c(as.character(sis.sum$a.label[i])," - ",as.character(sis.sum$b.label[i])),c("black","darkgray", "red"))
legend("bottomleft",
legend = c("C. lasius", "C. bracteatus", "C. allenii", "C. villosissimus"),
col = c("#009E73", "#56B4E9", "#cc79a7", "#FFD700"),
pch = 1,
bty = "n",
pt.cex = 1.5,
cex = 1,
text.col = "black",
text.font=c(3),
horiz = F ,
inset = c(0.05, 0.05))
}
#dev.off()# code silenced because of long run time
##########################################
# Make range maps for C. allenii and C. villosissimus
##########################################
setwd("/Users/Julia/Library/CloudStorage/GoogleDrive-jharenca@ucsc.edu/My Drive/Costus/Genomes/distribution\ maps")
library(rgdal) # read OGR vector maps into Spatial objects
library(elevatr) # get raster elevation
library(raster) # creating rasters
library(maps) # geographic maps
library(mapproj) # apply a map projection
df.occur <- read.csv(file = "cleaned.occurrences.csv", as.is=TRUE, sep=",")
lon.low <- -1+min(df.occur$decimalLongitude)
lon.high <- 1+max(df.occur$decimalLongitude)
lat.low <- -1+min(df.occur$decimalLatitude)
lat.high <- 1+max(df.occur$decimalLatitude)
# Generate a data frame of lat/long coordinates.
ex.df <- data.frame(x=seq(from=lon.low, to=lon.high, length.out=10),
y=seq(from=lat.low, to=lat.high, length.out=10))
# Specify projection.
prj_dd <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
# Use elevatr package to get elevation data for each point.
elev <- get_elev_raster(ex.df, prj = prj_dd, z = 5, clip = "bbox")
#crop to just land areas
path.data <- "worldboarders"
world_borders <- readOGR(dsn = path.data, layer = "TM_WORLD_BORDERS-0.3")
projection(world_borders) <- CRS(prj_dd)
# Crop elevation data by extent of world boarders
elev <- crop(elev, extent(world_borders))
#identify pixels of elevation raster that lie within the borders of land
elev <- mask(elev, world_borders)
# subset just the desired occurance data for C. villosissimus anc C. allenii
vill.occur <- df.occur %>% filter(acceptedScientificName == "Costus villosissimus Jacq.")
alle.occur <- df.occur %>% filter(acceptedScientificName == "Costus allenii Maas")
### define extent of map
lon.low= -1+min(vill.occur$decimalLongitude)
lon.high= 1+max(vill.occur$decimalLongitude)
lat.low= -1+min(vill.occur$decimalLatitude)
lat.high= 1+max(vill.occur$decimalLatitude)
pdf(file="figures/Range_Map2.pdf", width=8,height=6)
map("world", resolution=0, wrap=T, mar=c(2,2,2,0),xlim=c(lon.low, lon.high),ylim=c(lat.low, lat.high), main="test", col="grey")
vill <- mapproject(vill.occur$decimalLongitude, vill.occur$decimalLatitude, proj="")
alle <- mapproject(alle.occur$decimalLongitude, alle.occur$decimalLatitude, proj="")
plot(elev, col = gray.colors(25, start = 0.5, end = 1, gamma = 1, alpha = 1), add = TRUE, legend=F)
points(vill, col="#FFD700", cex=.8, pch=19)
points(alle, col="#cc79a7", cex=.8, pch=19)
# points(vill, bg="#FFD700", cex=1, pch=21, col = "black")
# points(alle, bg="#cc79a7", cex=1, pch=21, col = "black")
dev.off()We gathered leaf toughness data from wild plants of both species (18 C. allenii and 20 C. villosissimus) using a leaf penetrometer, which measures the pressure required to poke a hole in the leaf. Here we evaluate whether there is a difference in leaf toughness between species.
First, we read in and plot the data:
# read in the data
tough <- read.csv("data/leaf.toughness.csv", header = TRUE)
# Plot leaf toughness
ggplot(data=tough, aes(x=species, y=ave)) +
geom_boxplot(outlier.shape=NA, aes(fill = species)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
geom_jitter(alpha=0.6, width=0.15) +
theme(plot.title = element_text(hjust = 0.5), legend.text.align = 0) +
xlab("") +
ylab("leaf toughness (g)")+
scale_x_discrete(labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
geom_text(x='vill', y=198, aes(label = "p<0.001"), size=2.5)#ggsave("figures/leaf_toughness_plot.png", device = "png", width = 10, height = 7, units = "cm")Next, we evaluate the data, find that it best fits a lognormal distribution, and run both a nonparametric test and t-test on log transformed data.
# subset the data to evaluate alle and vill data distributions separately
alle.tough <- tough[tough$species =="alle",] # subsetting alle toughness data
vill.tough <- tough[tough$species =="vill",] # subsetting vill toughness data
shapiro.test(alle.tough$ave) # not normally distributed (tough leaves spike)
Shapiro-Wilk normality test
data: alle.tough$ave
W = 0.85313, p-value = 0.009493
hist(alle.tough$ave, breaks=20)shapiro.test(vill.tough$ave) # not normally distributed, but sorta close; p-value = 0.0765
Shapiro-Wilk normality test
data: vill.tough$ave
W = 0.91415, p-value = 0.0765
hist(vill.tough$ave, breaks=20)# QQ plots of different fits
qqp(alle.tough$ave, "norm")[1] 1 18
qqp(alle.tough$ave, "lnorm") #best [1] 1 18
shapiro.test(log(alle.tough$ave)) # marginal p-value = 0.06202
Shapiro-Wilk normality test
data: log(alle.tough$ave)
W = 0.9019, p-value = 0.06202
qqp(vill.tough$ave, "norm")[1] 20 19
qqp(vill.tough$ave, "lnorm") #best [1] 20 19
shapiro.test(log(vill.tough$ave)) # could be normal (not not normal)
Shapiro-Wilk normality test
data: log(vill.tough$ave)
W = 0.97175, p-value = 0.7912
wilcox.test(tough$ave ~ tough$species, paired = F) #W = 294, p-value = 0.000566
Wilcoxon rank sum exact test
data: tough$ave by tough$species
W = 294, p-value = 0.000566
alternative hypothesis: true location shift is not equal to 0
t.test(log(tough$ave) ~ tough$species, paired = F) # t = 3.6241, df = 33.24, p-value = 0.0009574
Welch Two Sample t-test
data: log(tough$ave) by tough$species
t = 3.6241, df = 33.24, p-value = 0.0009574
alternative hypothesis: true difference in means between group alle and group vill is not equal to 0
95 percent confidence interval:
0.1166056 0.4148987
sample estimates:
mean in group alle mean in group vill
4.907907 4.642155
# getting un-transformed averages
aggregate(tough$ave, list(tough$species), FUN=mean) Group.1 x
1 alle 139.3889
2 vill 105.9000
We have chemistry data for oxidative capacity and chemical concentrations for each of three compound classes: phenolics, flavonoids, and steroidal saponins (aka steroidal glycosides). Here, we compare chemical concentrations and oxidative capacity between species.
First, we read in and plot the data:
# read in data and adjust column headers
chem <- read.csv('data/Full_chem_data.csv', header = T)
names(chem) <- c("Accession", "Species", "DPPH", "Oxidative.Capacity",
"Concentration.of.Phenolics", "Concentration.of.Flavonols",
"Concentration.of.NF.Phenolics", "Concentration.of.Saponins")
# Plot Chemistry data
# Saponins
S <- ggplot(chem, aes(x=Species, y=Concentration.of.Saponins)) +
geom_boxplot(outlier.shape=NA, aes(fill = Species)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species") +
#ggtitle("Saponins") +
geom_jitter(alpha=0.6, width=0.15) +
guides(fill="none") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("") +
ylab("Concentration of Saponins") +
#coord_cartesian(ylim = c(0, 1)) +
scale_x_discrete(labels=c("ALLE" = expression(italic("C. allenii")),
"VILL" = expression(italic("C. villosissimus")))) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
geom_text(x='VILL', y=0.75, aes(label = "p<0.001"), size=3.5) + # y=0.99 for limited y
theme(axis.text = element_text(size = 12))
# Non-flavonoid Phenolics
P <- ggplot(chem, aes(x=Species, y=Concentration.of.NF.Phenolics)) +
geom_boxplot(outlier.shape=NA, aes(fill = Species)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species") +
#ggtitle("Non-Flavonoid Phenolics") +
geom_jitter(alpha=0.6, width=0.15) +
guides(fill="none") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("") +
ylab("Concentration of non-Flavonoid Phenolics") +
#coord_cartesian(ylim = c(0, 1)) +
scale_x_discrete(labels=c("ALLE" = expression(italic("C. allenii")),
"VILL" = expression(italic("C. villosissimus")))) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
geom_text(x='VILL', y=0.8, aes(label = "p<0.01"), size=3.5) + # y=0.99 for limited y
theme(axis.text = element_text(size = 12))
# Flavonids
Fl <- ggplot(chem, aes(x=Species, y=Concentration.of.Flavonols)) +
geom_boxplot(outlier.shape=NA, aes(fill = Species)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("ALLE" = expression(italic("C. allenii")),
"VILL" = expression(italic("C. villosissimus")))) +
#ggtitle("Flavonoids") +
geom_jitter(alpha=0.6, width=0.15) +
theme(plot.title = element_text(hjust = 0.5), legend.text.align = 0) +
xlab("") +
ylab("Concentration of Flavonoids") +
#coord_cartesian(ylim = c(0, 0.3)) +
guides(fill="none") +
scale_x_discrete(labels=c("ALLE" = expression(italic("C. allenii")),
"VILL" = expression(italic("C. villosissimus")))) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
geom_text(x='ALLE', y=0.13, aes(label = "p<0.01"), size=3.5) + # y=0.299 for limited y and x='VILL'
theme(axis.text = element_text(size = 12))
# Oxidative capacity
OC <- ggplot(chem, aes(x=Species, y=Oxidative.Capacity)) +
geom_boxplot(outlier.shape=NA, aes(fill = Species)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("ALLE" = expression(italic("C. allenii")),
"VILL" = expression(italic("C. villosissimus")))) +
#ggtitle("Oxidative Capacity") +
geom_jitter(alpha=0.6, width=0.15) +
theme(plot.title = element_text(hjust = 0.5), legend.text.align = 0) +
xlab("") +
ylab("Oxidative Capacity (1-DPPH)") +
#coord_cartesian(ylim = c(0, 2.1)) +
guides(fill="none") +
scale_x_discrete(labels=c("ALLE" = expression(italic("C. allenii")),
"VILL" = expression(italic("C. villosissimus")))) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
geom_text(x='VILL', y=2.0, aes(label = "p<0.01"), size=3.5) + # y=2.099 for limited y
theme(axis.text = element_text(size = 12))
# Joining the plots with patchwork
P + OC + S + Fl + plot_annotation(tag_levels = 'A', tag_suffix = ')') &
theme(plot.tag.position = c(0, 1),
plot.tag = element_text(size = 11.5, hjust = 0, vjust = 0))##ggsave("Chemistry/Chemistry_plot", device = "png", width = 24, height = 20, units = "cm")
#ggsave("figures/Chemistry_plot_free_scale.png", device = "png", width = 18, height = 19, units = "cm")Next, we conduct t-tests comparing values between species and correct for multiple tests.
# assessing normality
# dividing the data by speceis to assess within species normality
# within group normality is the assumption of an unpaired students/welches t-test
alle_chem <- chem %>% filter(Species=="ALLE")
vill_chem <- chem %>% filter(Species=="VILL")
# # non-Flavonoid Phenolics
# shapiro.test(alle_chem$Concentration.of.NF.Phenolics) # W = 0.9032, p-value = 0.3508
# qqp(alle_chem$Concentration.of.NF.Phenolics, "norm")
# shapiro.test(vill_chem$Concentration.of.NF.Phenolics) # W = 0.94897, p-value = 0.7204
# qqp(vill_chem$Concentration.of.Phenolics_nf, "norm")
#
# # Flavonoids
# shapiro.test(alle_chem$Concentration.of.Flavonols) # W = 0.96981, p-value = 0.8971
# qqp(alle_chem$Concentration.of.Flavonols, "norm")
# shapiro.test(vill_chem$Concentration.of.Flavonols) # W = 0.90093, p-value = 0.3367
# qqp(vill_chem$Concentration.of.Flavonols, "norm")
#
# # Saponins
# shapiro.test(alle_chem$Concentration.of.Saponins) # W = 0.86689, p-value = 0.1743
# qqp(alle_chem$Concentration.of.Saponins, "norm")
# shapiro.test(vill_chem$Concentration.of.Saponins) # W = 0.85908, p-value = 0.1486
# qqp(vill_chem$Concentration.of.Saponins, "norm")
#
# # Oxidative Capacity
# shapiro.test(alle_chem$Oxidative.Capacity) # W = 0.74073, p-value = 0.01014
# qqp(alle_chem$Oxidative.Capacity, "norm") # NOT normal
# hist(alle_chem$Oxidative.Capacity, breaks = 20)
# shapiro.test(vill_chem$Oxidative.Capacity) # W = 0.86363, p-value = 0.1631
# qqp(vill_chem$Oxidative.Capacity, "norm")
# hist(vill_chem$Oxidative.Capacity, breaks = 20)
# Stats
# Phenolics
t.test(chem$Concentration.of.NF.Phenolics~chem$Species, alternative = "two.sided")
Welch Two Sample t-test
data: chem$Concentration.of.NF.Phenolics by chem$Species
t = 5.0247, df = 6.4714, p-value = 0.001919
alternative hypothesis: true difference in means between group ALLE and group VILL is not equal to 0
95 percent confidence interval:
0.1997338 0.5662662
sample estimates:
mean in group ALLE mean in group VILL
0.5228095 0.1398095
# wilcox.test(chem$Concentration.of.Phenolics_nf~chem$Species) # qualitatively equivalent to t.test
# t = 5.0247, df = 6.4714, p-value = 0.001919
# Flavonids
t.test(chem$Concentration.of.Flavonols~chem$Species)
Welch Two Sample t-test
data: chem$Concentration.of.Flavonols by chem$Species
t = -3.9658, df = 7.7076, p-value = 0.004465
alternative hypothesis: true difference in means between group ALLE and group VILL is not equal to 0
95 percent confidence interval:
-0.04589939 -0.01200537
sample estimates:
mean in group ALLE mean in group VILL
0.0800000 0.1089524
# wilcox.test(chem$Concentration.of.Flavonoids~chem$Species) # qualitatively equivalent to t.test
# t = -3.9658, df = 7.7076, p-value = 0.004465
# Saponins
t.test(chem$Concentration.of.Saponins~chem$Species)
Welch Two Sample t-test
data: chem$Concentration.of.Saponins by chem$Species
t = 5.5942, df = 11.886, p-value = 0.0001214
alternative hypothesis: true difference in means between group ALLE and group VILL is not equal to 0
95 percent confidence interval:
0.1483432 0.3379426
sample estimates:
mean in group ALLE mean in group VILL
0.6570952 0.4139524
# wilcox.test(chem$Concentration.of.Saponins~chem$Species) # qualitatively equivalent to t.test
# t = 5.5942, df = 11.886, p-value = 0.0001214
# Oxidative activity
t.test(chem$Oxidative.Capacity~chem$Species)
Welch Two Sample t-test
data: chem$Oxidative.Capacity by chem$Species
t = 4.5951, df = 6.8692, p-value = 0.002624
alternative hypothesis: true difference in means between group ALLE and group VILL is not equal to 0
95 percent confidence interval:
0.1444690 0.4532453
sample estimates:
mean in group ALLE mean in group VILL
2.015143 1.716286
# wilcox.test(chem$Oxidative.Capacity~chem$Species) # W = 49, p-value = 0.0005828 ; stronger result, but both <0.01.
# t = 4.5951, df = 6.8692, p-value = 0.002624
# multiple test correction
p <- c(0.001919, 0.004465, 0.0001214, 0.002624)
p.adjust(p, method = "BH")[1] 0.003498667 0.004465000 0.000485600 0.003498667
# 0.003498667 0.004465000 0.000485600 0.003498667We see that, even with test correction for the four t-tests, chemical concentrations and oxidative capacity are all statistically different between species (p<0.01).
Analyses comparing wild herbivory between Costus villosissimus and C. allenii.
Analysis comparing the presence or absence of herbivory on wild plants from both 2019 and 2022.
First, we read in the 2019 and 2022 data, create presence/absence columns for each, and combine them into a single dataframe.
# 2019 data
Wherb19 <- read.csv("data/vill-all_RLB-herbivory_data_2019-Herbivory.csv", header=TRUE)
# create presence/absence of herbivory column
Wherb19 <- Wherb19 %>% mutate(pa = case_when(
per_herbivory == 0 ~ 0,
per_herbivory != 0 ~ 1
))
# 2020 data
Wherb22 <- read.csv("data/2022_wild_herbivory.csv", header = T)
# create presence/absence of herbivory column
Wherb22 <- Wherb22 %>%
mutate(pa = case_when(
num_w_RLBH == 0 ~ 0,
num_w_RLBH != 0 ~ 1
)) %>% dplyr::select(date, spp, id, pa)
# making combined 2019/2022 dataset
Wherb19$year <- '2019'
Wherb22$year <- '2022'
names(Wherb19) <- c("date","ID", "spp", "ave_per_herb", "pa", "year")
Wherb_full <- Wherb22 %>%
dplyr::select(spp,pa,year) %>%
full_join((Wherb19 %>% dplyr::select( "spp", "pa", "year")),.) Next, we evaluate the potential impacts of plant species and year on the presence vs absence of herbivory with a binomial regression. We first check to ensure the model is not under- or over-dispersed.
# binomial regression
bin_reg <- glm(Wherb_full$pa ~ Wherb_full$spp + Wherb_full$year, family = "binomial")
# Check over or under-dispersion with the package DHARMA.
simOut <- simulateResiduals(bin_reg)
plot(simOut) # dispersion looks goodtestDispersion(simOut) # dispersion = 1.0241, p-value = 0.808; not overdispersed
DHARMa nonparametric dispersion test via sd of residuals fitted vs.
simulated
data: simulationOutput
dispersion = 1.0241, p-value = 0.808
alternative hypothesis: two.sided
The model is not overdispersed! :D
Now we look at the results of the regression and plot wild herbivory by year and species.
# binomial regression summary
summary(bin_reg)
Call:
glm(formula = Wherb_full$pa ~ Wherb_full$spp + Wherb_full$year,
family = "binomial")
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4029 -0.5089 0.3387 0.3387 2.0535
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.5860 0.2836 2.066 0.0388 *
Wherb_full$sppvill -4.8085 0.5409 -8.890 < 2e-16 ***
Wherb_full$year2022 2.2435 0.4931 4.550 5.37e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 347.06 on 250 degrees of freedom
Residual deviance: 160.85 on 248 degrees of freedom
AIC: 166.85
Number of Fisher Scoring iterations: 6
# # code for choosing colorblind friendly colors.
# library(colorblindcheck)
# palette_check(c("#cc79a7", "#FFD700"), plot = TRUE)
# Making a simple barplot of presence/absence herbivory data from both years
# calculate percentages for plot
PA_barplot <- Wherb_full %>% group_by(spp, year) %>% summarise(present = sum(pa), N = n()) %>% mutate( percent = present/N*100)
PA_barplot$label <- c("", "", "0", "")
PA_barplot$pval <- c("", "", "", "p<<0.001")
# create plot
WH_PA <- ggplot(data=PA_barplot, aes(x=year, y=percent, fill=spp)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
ggtitle("Natural herbivory") +
ylim(0, 100) +
xlab("") +
#theme(plot.title = element_text(hjust = 0.5), legend.text.align = 0) +
ylab('% of individuals with herbivory') +
geom_text(aes(x=year, y=percent+4, label = label, group = spp), size = 3.5, position = position_dodge(width = 0.9)) +
geom_text(aes(x=year, y=percent+86.5, label = pval, group = spp), size = 3.5, position = position_dodge(width = 0.9), hjust = 0.75) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
theme(axis.text = element_text(size = 10)) +
theme(legend.text.align = 0)
# # save plot p << 0.001
# ggsave("figures/wild_herbivory_plot.png", H_PA, device = "png", width = 10, height = 7, units = "cm")We see that both species and year are significant predictors of the presence of herbivory at a < 0.00001 level, with greater herbivory on C.allenii and during the wet year, 2022.
Here we compare the amount of herbivory as a percentage of leaf area between species for a random subset of wild plants in 2019 and 2022. First, we plot the data.
# Read in the 2019 data (refresh)
Wherb19 <- read.csv("data/vill-all_RLB-herbivory_data_2019-Herbivory.csv", header=TRUE)
# select random subset for herbivory percent analysis
set.seed(1) # use 1 to match paper
Wherb19_subset <- Wherb19 %>%
group_by(species) %>%
sample_n(10)
# # Non-parametric test of 2019 only
# wilcox.test(Wherb19_subset$per_herbivory ~ Wherb19_subset$species) # W = 90, p-value = 0.0007512
# ## MEANS: allenii - 2.54; vill - 0.00
# Read in the 2022 data
Wherb22_subset <- read.csv("data/2022_Wild_Percent_Herbivory_subset.csv", header = T)
# # Non-parametric test of 2022 data only
# wilcox.test(Wherb22_subset$ave_per_herb ~ Wherb22_subset$spp) # W = 96, p-value = 0.0003801
# ## MEANS: allenii - 3.055 ; vill - 0.1947
# 2019/2022 combined
Wherb19_subset$year <- '2019'
Wherb22_subset$year <- '2022'
names(Wherb19_subset) <- c("date","ID", "spp", "ave_per_herb", "year")
Wherb_full_subset <- Wherb22_subset %>%
dplyr::select(spp,ave_per_herb,year) %>%
full_join((Wherb19_subset %>% dplyr::select( "spp", "ave_per_herb", "year")),.)
# plotting subset data
# plot of herbivory subset data - probably not for inclusion #
WH_subset <- ggplot(data=Wherb_full_subset, aes(x=year, y=ave_per_herb, fill = spp)) +
geom_boxplot(outlier.shape=NA) +
geom_point(alpha=0.6, position=position_jitterdodge(jitter.width=0.15)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
theme(plot.title = element_text(hjust = 0.5), legend.text.align = 0) +
ggtitle("Natural herbivory percent") +
xlab("") +
ylab("% herbivore leaf damage")+
scale_x_discrete(labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
guides(fill="none")
# Print combined plot
WH_PA + WH_subset# ggsave("figures/wild_per_herbivory_plot.png", device = "png", width = 10, height = 8, units = "cm")
# Testing for a difference between years with a nonparametric wilcoxon test
wilcox.test(ave_per_herb ~ year, data = Wherb_full_subset)
Wilcoxon rank sum test with continuity correction
data: ave_per_herb by year
W = 165, p-value = 0.3185
alternative hypothesis: true location shift is not equal to 0
# W = 165, p-value = 0.3185
# Testing for a difference between species with a nonparametric wilcoxon test
wilcox.test(ave_per_herb ~ spp, data = Wherb_full_subset)
Wilcoxon rank sum test with continuity correction
data: ave_per_herb by spp
W = 372, p-value = 7.095e-07
alternative hypothesis: true location shift is not equal to 0
# W = 372, p-value = 7.095e-07Choice trials present beetles with both plant species simultaneously and measure how much they eat of each. In June 2019, and again in May and June 2022, we collected wild beetles from various species of Costus growing along creeks in the areas surrounding Pipeline road and recorded the species on which beetles were found. To ensure trial beetles weren’t biased towards one of our focal species, we did not collect beetles from either C. villosissimus or C. allenii. Beetles did not have access to leaves for 12 hours before we placed them in choice trials. Each beetle was only used in a singel trial before release. To quantify herbivory, we laid a transparency printed with a mm2 grid over the leaf squares and counted mm2 of herbivore damage.
For each trial, we placed beetles in ramekins (with wet paper and perforated lids) with one 1.5 cm2 leaf square of both C. villosissimus and C. allenii and quantified the leaf area eaten for each species after 12 hours. We conducted 13 successful choice trials in 2019 and 22 in 2022 (a trial was considered unsuccessful and removed if the beetle did not eat either leaf square).
Again, we first read in, clean, and plot the data.
# read in the data
btd <- read.csv("data/beetle_trials.csv", header = TRUE)
# subset rows with alle and vill together as trial - aka the choice trials
BCT <- btd[btd$Species.in.trial =="alle/vill",]
# converting character to numeric
BCT$vill.mm2eaten <- as.numeric(BCT$vill.mm2eaten)
BCT$alle.mm2eaten <- as.numeric(BCT$alle.mm2eaten)
# remove trials in which beetles did not eat either spp
BCT.no.0 <- BCT[,1:6] %>% filter(!(vill.mm2eaten == 0 & alle.mm2eaten == 0))
# remove trials in which beetles were collected from alle (focal species)
BCT.no.0 <- BCT.no.0 %>% filter(!(Species.found.on == "alle"))
## plotting
BCT.no.0$beetleID <- 1:nrow(BCT.no.0) # assign each trial (each beetle) an ID before splitting each trial into two rows (one for each species)
# convert to long format
BCT.no.0.long <- BCT.no.0 %>%
pivot_longer(cols = c(vill.mm2eaten, alle.mm2eaten),
names_to = "trial.spp",
values_to = "mm2eaten")
# remove '.mm2eaten' from species column
BCT.no.0.long$trial.spp <- substr(BCT.no.0.long$trial.spp, 1, 4)
# box plot of choice data
choice <- ggplot(BCT.no.0.long, aes(trial.spp, mm2eaten)) +
geom_boxplot(outlier.shape=NA, aes(fill = trial.spp)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
geom_jitter(alpha=0.6, width=0.15) +
ggtitle("Beetle choice trials") +
xlab("") +
ylab(bquote("herbivore damage ("~mm^2~"eaten)")) +
guides(fill="none") +
scale_x_discrete(labels=c("alle" = expression(italic("allenii")),
"vill" = expression(italic(" villosissimus")))) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
geom_text(x='vill', y=42, aes(label = "p=0.013"), size=3.5) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
theme(axis.text = element_text(size = 10))
#ggsave("beetle_choice_plot", device = "png", width = 5, height = 5, units = "cm")
#ggsave("beetle_choice_plot2", device = "png", width = 9, height = 6, units = "cm")Now for the analysis! We first take the difference of mm2 eaten of each species for each beetle/trial to get a single preference value. We then evaluate the impact of a random effect, date, on that preference value.
# Taking the difference of mm2 eaten between species and dividing by total mm2 eaten for each beetle/trial to get a single, standardized preference value. (mm2 vill eaten - mm2 alle eaten).
BCT.no.0 <- BCT.no.0 %>% mutate(preference = (vill.mm2eaten - alle.mm2eaten)/(vill.mm2eaten + alle.mm2eaten))
# assess data distribution
hist(BCT.no.0$preference, breaks=20) # Not normal# transform data to fit a beta distribution:
BCT.no.0 <- BCT.no.0 %>% mutate(preference_beta = (preference + 1)/(1+1),
preference_beta = case_when(
preference_beta == 0 ~ preference_beta + 0.00001,
preference_beta == 1 ~ preference_beta - 0.00001,
TRUE ~ preference_beta))
# beta regression with date as a random effect
library(betareg)
beta <- betareg(preference_beta ~ 1 | date, data = BCT.no.0)
summary(beta)
Call:
betareg(formula = preference_beta ~ 1 | date, data = BCT.no.0)
Standardized weighted residuals 2:
Min 1Q Median 3Q Max
-1.6885 -0.3081 -0.0339 1.0210 1.4172
Coefficients (mean model with logit link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.1865 0.1964 0.95 0.342
Phi coefficients (precision model with log link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.26794 0.50996 -2.486 0.01291 *
date5/27/22 -0.01405 0.77828 -0.018 0.98560
date6/16/22 0.24975 0.88782 0.281 0.77848
date6/18/22 0.22780 0.72354 0.315 0.75288
date6/19/22 -0.42836 0.77426 -0.553 0.58009
date6/21/22 0.32921 0.68742 0.479 0.63201
date7/1/22 -0.35926 1.13199 -0.317 0.75096
date7/11/19 4.77743 1.48359 3.220 0.00128 **
date7/9/19 0.05353 0.58870 0.091 0.92754
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Type of estimator: ML (maximum likelihood)
Log-likelihood: 131.2 on 10 Df
Number of iterations: 21 (BFGS) + 5 (Fisher scoring)
# calculate how much more vill than alle beetles ate on average:
per_greater <- (mean(BCT.no.0$vill.mm2eaten) - mean(BCT.no.0$alle.mm2eaten))/( mean(BCT.no.0$alle.mm2eaten))*100 # ~39%Herbivory arrays consisted of a potted plant of each species (C. villosissimus and C. allenii) placed near a wild plant of a third species (C. scaber) that was colonized by at least one roleld leaf beetle. Arrays were placed in intermediate habitat along a creek on the seasonally dry side of the panamanian isthmus. It was not possible to accurately quantify the area eaten (area of beetle damage) after plants were in the field for 3 weeks, so we instead evaluate the proportion of healthy stems that experienced herbivory. (analysis of best guess estimates of area yield qualitatively the same result)
Data importing, cleaning, and plotting:
array <- read.csv("data/Herbivory_arrays_220703.csv", header = T)
names(array) <- c("ID", "array_num", "alt_ID","spp", "site", "date", "herbivory_yn",
"num_RLs", "num_stms_herb", "lf1_herb", "lf1_area", "lf2_herb", "lf2_area",
"lf3_herb", "lf3_area", "lf4_herb", "lf4_area", "lf5.herb", "lf5_area",
"lf6.herbivory", "lf6.area", "lf7.herbivory", "lf7.area", "lf8.herbivory",
"lf8.area", "lf9.herbivory", "lf9.area", "lf10.herbivory", "lf10.area",
"lf11.herbivory", "lf11.area", "notes")
### NEED TO FIX! should just convert herbivory yn column to 1 and 0 directly rather than stupidly from proportion also calculated below.
# Filter to only include census dates, select relevant cols, and convert herbivory Y and N to 1 and 0 for binomial regression.
array <- array %>%
dplyr::select(ID, array_num, spp, site, date, herbivory_yn, num_stms_herb) %>% # choose only certain columns to include
mutate(herbivory_yn = ifelse(herbivory_yn == "y", 1, 0)) %>%
filter(!is.na(num_stms_herb)) # remove rows with no data in the 'num_stms_herb' column
# # plot
# PA_barplot <- Wherb_full %>% group_by(spp, year) %>% summarise(present = sum(pa), N = n()) %>% mutate( percent = present/N*100)
# ggsave("arrays_lf_proportion_plot.png", device = "png", width = 6, height = 8, units = "cm")Analysis of herbivory presence absence data: First, we evaluated the impact of microsite by fitting a binomial regression model with species as a fixed effect and site as a random effect. Site explained zero variability in the data, so we conducted a Pearson’s Chi-squared test with Yates’ continuity correction.
# Analysis
# binomial regression
array_bin_reg <- glmer(herbivory_yn ~ spp + (1 | site),
data = array,
family = "binomial")
summary(array_bin_reg)Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: herbivory_yn ~ spp + (1 | site)
Data: array
AIC BIC logLik deviance df.resid
48.2 54.0 -21.1 42.2 49
Scaled residuals:
Min 1Q Median 3Q Max
-3.4641 0.2887 0.2887 0.5477 0.5477
Random effects:
Groups Name Variance Std.Dev.
site (Intercept) 0 0
Number of obs: 52, groups: site, 11
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.4849 0.7360 3.376 0.000735 ***
sppvill -1.2809 0.8708 -1.471 0.141306
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
sppvill -0.845
optimizer (Nelder_Mead) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
# Check over or under-dispersion with the package DHARMA.
simOut <- simulateResiduals(array_bin_reg)
plot(simOut) # looks good# Site explains 0 variance, so we conduct a Chi squared test with Yates continuity correction instead:
# create a contingency table of spp and pa
tab <- table(array$spp, array$herbivory_yn)
# perform a chi-squared test
chisq.test(tab) # X-squared = 1.3295, df = 1, p-value = 0.2489
Pearson's Chi-squared test with Yates' continuity correction
data: tab
X-squared = 1.3295, df = 1, p-value = 0.2489
Analysis of subset with reasonable percent herbivory estimates: We fit a generalized linear mixed model with log-transformed area of herbivore damage as the response, species as a fixed effect, and microsite as a random effect. We compared this to a model with no random effects with AIC values and a log-likelihood ratio test, which both supported the simpler model. Therefore, we assessed whether herbivory varied between species with a paired Welch’s two-sample t-test. The difference in herbivory between species in each array was not apparently normal, but nonparametric Wilcoxon rank-sum test results did not differ from the t-test, so only t-test results are reported here.
# evaluating the subset of arrays with decent percent herbivory data
all_ests <- read.csv('data/array_subset_all_ests.csv', header = T)
names(all_ests) <- c("ID", "array_num", "alt_ID","spp", "site", "date", "herbivory_yn",
"num_RLs", "num_stms_herb", "per_herbiv", "total_herbiv", "lf1_herb", "lf1_area", "lf2_herb", "lf2_area",
"lf3_herb", "lf3_area", "lf4_herb", "lf4_area", "lf5.herb", "lf5_area",
"lf6.herbivory", "lf6.area", "lf7.herbivory", "lf7.area", "lf8.herbivory",
"lf8.area", "lf9.herbivory", "lf9.area", "lf10.herbivory", "lf10.area",
"lf11.herbivory", "lf11.area", "notes")
# data structuring and cleaning
all_ests <- all_ests %>% dplyr::select(array_num, spp, site, date, herbivory_yn, per_herbiv, total_herbiv) %>% filter(per_herbiv != 'na')
all_ests$per_herbiv <- as.numeric(all_ests$per_herbiv)
all_ests$total_herbiv <- as.numeric(all_ests$total_herbiv)
# Plot
array_est <- ggplot(all_ests, aes(spp, total_herbiv)) +
geom_boxplot(outlier.shape=NA, aes(fill = spp)) +
scale_fill_manual(values = c("#cc79a7", "#FFD700"), name = "Species",
labels=c("alle" = expression(italic("C. allenii")),
"vill" = expression(italic("C. villosissimus")))) +
ggtitle("Herbivory arrays") +
geom_jitter(alpha=0.6, width=0.15) +
#theme(plot.title = element_text(hjust = 0.5), legend.text.align = 0) +
xlab("") +
ylab(bquote("herbivore damage ("~mm^2~"eaten)")) +
guides(fill="none") +
scale_x_discrete(labels=c("alle" = expression(italic("allenii")),
"vill" = expression(italic(" villosissimus")))) +
geom_text(x='vill', y=2650, aes(label = "N.S."), size=3.5, hjust =-0.06) +
theme(axis.text.x = element_text(hjust = 0.6, colour = "black")) +
theme(axis.text = element_text(size = 10))
# Full model: Regression of total herbivory
lm_log <- glmer((total_herbiv+1) ~ spp + (1 | array_num) + (1 | site),
data = all_ests,
family=gaussian(link = "log"))
summary(lm_log) # sppvill 1.761 4.198 0.419Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: gaussian ( log )
Formula: (total_herbiv + 1) ~ spp + (1 | array_num) + (1 | site)
Data: all_ests
AIC BIC logLik deviance df.resid
1041.8 1051.4 -515.9 1031.8 45
Scaled residuals:
Min 1Q Median 3Q Max
-2.8165 -0.4460 -0.0026 0.4762 2.9772
Random effects:
Groups Name Variance Std.Dev.
array_num (Intercept) 61927 248.9
site (Intercept) 48399 220.0
Residual 70221 265.0
Number of obs: 50, groups: array_num, 26; site, 11
Fixed effects:
Estimate Std. Error t value Pr(>|z|)
(Intercept) 5.2531 83.3211 0.063 0.950
sppvill 0.0839 0.1185 0.708 0.479
Correlation of Fixed Effects:
(Intr)
sppvill -0.001
optimizer (Nelder_Mead) convergence code: 0 (OK)
unable to evaluate scaled gradient
Model failed to converge: degenerate Hessian with 1 negative eigenvalues
# Check model fit
simOut <- simulateResiduals(lm_log)
plot(simOut) # funky, but passes the tests# Compare nested models:
lm_log2 <- glmer((total_herbiv+1) ~ spp + (1 | site),
data = all_ests,
family=gaussian(link = "log"))
summary(lm_log2)Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: gaussian ( log )
Formula: (total_herbiv + 1) ~ spp + (1 | site)
Data: all_ests
AIC BIC logLik deviance df.resid
890.1 897.8 -441.1 882.1 46
Scaled residuals:
Min 1Q Median 3Q Max
-3.1581 -0.3135 -0.1366 0.2062 3.5882
Random effects:
Groups Name Variance Std.Dev.
site (Intercept) 96779 311.1
Residual 144590 380.2
Number of obs: 50, groups: site, 11
Fixed effects:
Estimate Std. Error t value Pr(>|z|)
(Intercept) 5.608099 0.246685 22.73 <2e-16 ***
sppvill -0.189174 0.003456 -54.74 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
sppvill -0.007
optimizer (Nelder_Mead) convergence code: 0 (OK)
Model failed to converge with max|grad| = 0.0158589 (tol = 0.002, component 1)
lm_log3 <- glm((total_herbiv+1) ~ spp,
data = all_ests,
family=gaussian(link = "log"))
summary(lm_log3)
Call:
glm(formula = (total_herbiv + 1) ~ spp, family = gaussian(link = "log"),
data = all_ests)
Deviance Residuals:
Min 1Q Median 3Q Max
-448.48 -318.48 -198.38 -15.53 2324.72
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.1081 0.2546 23.988 <2e-16 ***
sppvill -0.2522 0.4150 -0.608 0.546
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 327492)
Null deviance: 15845112 on 49 degrees of freedom
Residual deviance: 15719611 on 48 degrees of freedom
AIC: 780.81
Number of Fisher Scoring iterations: 7
AIC1 <- AIC(lm_log)
AIC2 <- AIC(lm_log2)
AIC3 <- AIC(lm_log3)
AIC2-AIC3 # 109 - very positive, 3 is better than 2 [1] 109.324
AIC1-AIC2 # 151.6804 - very positive, 2 is better than 1[1] 151.6804
anova(lm_log, lm_log2, lm_log3) # AIC for simplest model way higher than others, same with log likelihoodData: all_ests
Models:
lm_log3: (total_herbiv + 1) ~ spp
lm_log2: (total_herbiv + 1) ~ spp + (1 | site)
lm_log: (total_herbiv + 1) ~ spp + (1 | array_num) + (1 | site)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
lm_log3 3 780.81 786.55 -387.41 774.81
lm_log2 4 890.14 897.79 -441.07 882.14 0 1 1
lm_log 5 1041.82 1051.38 -515.91 1031.82 0 1 1
# paired analysis of total herbivory
# percent herbivory in long format for paired t-test
W_tot_herbiv <- all_ests %>% pivot_wider(., id_cols=array_num, names_from = spp, values_from = total_herbiv)
# assessing whether difference is normally distributed for use of paired t-test
shapiro.test(W_tot_herbiv$alle-W_tot_herbiv$vill) # p-value = 0.06014 ; normalish
Shapiro-Wilk normality test
data: W_tot_herbiv$alle - W_tot_herbiv$vill
W = 0.92057, p-value = 0.06014
hist(W_tot_herbiv$alle-W_tot_herbiv$vill, breaks =20)qqp(W_tot_herbiv$alle-W_tot_herbiv$vill, "norm")[1] 21 4
# paired t-test
t.test(W_tot_herbiv$alle, W_tot_herbiv$vill, paired = T) # t = 0.77779, df = 23, p-value = 0.4446
Paired t-test
data: W_tot_herbiv$alle and W_tot_herbiv$vill
t = 0.77779, df = 23, p-value = 0.4446
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-144.3215 318.2382
sample estimates:
mean difference
86.95833
wilcox.test(W_tot_herbiv$alle, W_tot_herbiv$vill, paired = T) # V = 195, p-value = 0.2076
Wilcoxon signed rank exact test
data: W_tot_herbiv$alle and W_tot_herbiv$vill
V = 195, p-value = 0.2076
alternative hypothesis: true location shift is not equal to 0
mean_alle <- mean(W_tot_herbiv$alle, na.rm = TRUE)
mean_vill <- mean(W_tot_herbiv$vill, na.rm = TRUE)
### Summary
# I could only get a regression to fit log(data+1) transformed response, but found that the model without any random variables is the best fit based on AIC comparison and log likelihood ratio tests. This indicates we can ignore those random variables and conduct paired mean tests. The difference between vill and alle percent eaten is not normal based on a shapiro test (though it looks pretty normal in a histogram), but is close to normal and passes the shapiro test for total herbivory. For both, I conducted wilcox test in addition to t-tests. The level of significance does not differ between the tests. Given that t-tests are pretty robust to non-normality and that the results are of the same significance level, I only report the t-test results in the text. # Plot wild herbivory, choice feeding trials, and arrays together (removed wild herbivory subset: WH_subset - supplemental)
WH_PA + choice + array_est +
plot_layout(guides = 'collect') +
plot_annotation(tag_levels = 'A', tag_suffix = ')') &
theme(plot.tag.position = c(0, 1),
plot.tag = element_text(size = 11.5, hjust = 0, vjust = 0))ggsave("figures/Figure_3_Herbivory_patterns.png", device = "png", width = 23, height = 8, units = "cm")